Confirmatory Factor Analysis and Structural Equation Models

Work in Progress

cfa
sem
measurment
library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)
reticulate::py_run_string("import pymc as pm")

options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)

knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(130)
Warning

This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys

Measurment and Measurment Constructs

df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate), 0.5, 1) # generate the noise to add
df$ls_p3[inflate] <- df$ls_p3[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])

head(df) |> kable()
ID region gender age se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p2 sup_parents_p3 ls_p1 ls_p2 ls_p3 ls_sum ls_mean
1 west female 13 4.857143 5.571429 4.500000 5.80 5.500000 5.40 6.5 6.5 7.0 7.0 7.0 6.0 5.333333 6.75 7.647112 19.73044 6.576815
2 west male 14 4.571429 4.285714 4.666667 5.00 5.500000 4.80 4.5 4.5 5.5 5.0 6.0 4.5 4.333333 5.00 4.469623 13.80296 4.600985
10 west female 14 4.142857 6.142857 5.333333 5.20 4.666667 6.00 4.0 4.5 3.5 7.0 7.0 6.5 6.333333 5.50 4.710020 16.54335 5.514451
11 west female 14 5.000000 5.428571 4.833333 6.40 5.833333 6.40 7.0 7.0 7.0 7.0 7.0 7.0 4.333333 6.50 5.636198 16.46953 5.489844
12 west female 14 5.166667 5.600000 4.800000 5.25 5.400000 5.25 7.0 7.0 7.0 6.5 6.5 7.0 5.666667 6.00 5.266592 16.93326 5.644419
14 west male 14 4.857143 4.857143 4.166667 5.20 5.000000 4.20 5.5 6.5 7.0 6.5 6.5 6.5 5.000000 5.50 5.913709 16.41371 5.471236

Candidate Structure

#| code-fold: true
#| code-summary: "Show the code"

datasummary_skim(df)|> 
 style_tt(
   i = 15:17,
   j = 1:1,
   background = "#20AACC",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 18:19,
   j = 1:1,
   background = "#2888A0",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 2:14,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftyu5q15n5ibjk84ch05
Unique Missing Pct. Mean SD Min Median Max Histogram
ID 283 0 187.9 106.3 1.0 201.0 367.0
age 5 0 14.7 0.8 13.0 15.0 17.0
se_acad_p1 32 0 5.2 0.8 3.1 5.1 7.0
se_acad_p2 36 0 5.3 0.7 3.1 5.4 7.0
se_acad_p3 29 0 5.2 0.8 2.8 5.2 7.0
se_social_p1 24 0 5.3 0.8 1.8 5.4 7.0
se_social_p2 27 0 5.5 0.7 2.7 5.5 7.0
se_social_p3 31 0 5.4 0.8 3.0 5.5 7.0
sup_friends_p1 13 0 5.8 1.1 1.0 6.0 7.0
sup_friends_p2 10 0 6.0 0.9 2.5 6.0 7.0
sup_friends_p3 13 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p1 11 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p2 11 0 5.9 1.1 2.0 6.0 7.0
sup_parents_p3 13 0 5.7 1.1 1.0 6.0 7.0
ls_p1 15 0 5.2 0.9 2.0 5.3 7.0
ls_p2 21 0 5.8 0.7 2.5 5.8 7.0
ls_p3 161 0 5.5 1.1 1.7 5.6 9.6
ls_sum 218 0 16.5 2.2 8.2 16.7 21.0
ls_mean 217 0 5.5 0.7 2.7 5.6 7.0
N %
region east 142 50.2
west 141 49.8
gender female 132 46.6
male 151 53.4
drivers = c('se_acad_p1', 'se_acad_p2', 'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3', 'sup_friends_p1','sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1' , 'sup_parents_p2' , 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3')



plot_heatmap <- function(df, title="Sample Covariances", subtitle="Observed Measures") {
  heat_df = df |> as.matrix() |> melt()
  colnames(heat_df) <- c("x", "y", "value")
  g <- heat_df |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
    geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
   scale_fill_gradient2(
      high = 'dodgerblue4',
      mid = 'white',
      low = 'firebrick2'
    ) + theme(axis.text.x = element_text(angle=45)) + ggtitle(title, subtitle)
  
  g
}

g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(cor(df[,  drivers]), "Sample Correlations")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Fit Initial Regression Models

formula_sum_1st = " ls_sum ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"

formula_sum_12 = " ls_sum ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"


formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)

min_max_norm <- function(x) {
    (x - min(x)) / (max(x) - min(x))
}

df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))

df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean

mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)

models = list(
    "Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm
     ),
    "Outcome: mean_score" = list(
      "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean, 
     "model_mean_score_norm" = mod_mean_norm
    )
    )
#| code-fold: true
#| code-summary: "Show the code"

modelsummary(models, stars=TRUE, shape ="cbind") |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_jb9uvnwer8qg2neciem3
Outcome: sum_score Outcome: mean_score
model_sum_1st_factors model_sum_1st_2nd_factors model_sum_score model_sum_score_norm model_mean_1st_factors model_mean_1st_2nd_factors model_mean_score model_mean_score_norm
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 6.335*** 4.038*** 3.709*** 9.081*** 2.112*** 1.346*** 1.236*** 3.027***
(0.979) (1.080) (1.071) (0.739) (0.326) (0.360) (0.357) (0.246)
se_acad_p1 0.231 -0.023 -0.068 -0.264 0.077 -0.008 -0.023 -0.088
(0.158) (0.197) (0.216) (0.833) (0.053) (0.066) (0.072) (0.278)
se_social_p1 1.039*** 0.515* 0.401+ 2.085+ 0.346*** 0.172* 0.134+ 0.695+
(0.175) (0.224) (0.224) (1.167) (0.058) (0.075) (0.075) (0.389)
sup_friends_p1 0.069 -0.263+ -0.273 -1.636 0.023 -0.088+ -0.091 -0.545
(0.095) (0.145) (0.169) (1.011) (0.032) (0.048) (0.056) (0.337)
sup_parents_p1 0.518*** 0.196 0.050 0.299 0.173*** 0.065 0.017 0.100
(0.107) (0.155) (0.161) (0.964) (0.036) (0.052) (0.054) (0.321)
se_acad_p2 0.415+ 0.382+ 1.475+ 0.138+ 0.127+ 0.492+
(0.216) (0.227) (0.875) (0.072) (0.076) (0.292)
se_social_p2 0.662** 0.483+ 2.093+ 0.221** 0.161+ 0.698+
(0.233) (0.246) (1.065) (0.078) (0.082) (0.355)
sup_friends_p2 0.358* 0.366* 1.647* 0.119* 0.122* 0.549*
(0.172) (0.180) (0.808) (0.057) (0.060) (0.269)
sup_parents_p2 0.375* 0.166 0.829 0.125* 0.055 0.276
(0.151) (0.162) (0.808) (0.050) (0.054) (0.269)
se_acad_p3 -0.072 -0.302 -0.024 -0.101
(0.196) (0.815) (0.065) (0.272)
se_social_p3 0.350+ 1.400+ 0.117+ 0.467+
(0.180) (0.721) (0.060) (0.240)
sup_friends_p3 0.056 0.334 0.019 0.111
(0.146) (0.878) (0.049) (0.293)
sup_parents_p3 0.452** 2.710** 0.151** 0.903**
(0.141) (0.847) (0.047) (0.282)
Num.Obs. 283 283 283 283 283 283 283 283
R2 0.332 0.389 0.420 0.420 0.332 0.389 0.420 0.420
R2 Adj. 0.323 0.371 0.394 0.394 0.323 0.371 0.394 0.394
AIC 1134.2 1117.0 1110.3 1110.3 512.4 495.2 488.5 488.5
BIC 1156.1 1153.5 1161.3 1161.3 534.2 531.7 539.5 539.5
Log.Lik. -561.090 -548.507 -541.139 -541.139 -250.183 -237.600 -230.232 -230.232
RMSE 1.76 1.68 1.64 1.64 0.59 0.56 0.55 0.55
models = list(
     "model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm,
     "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean,
     "model_mean_score_norm" = mod_mean_norm
    )

modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted", 
                color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")

Significant Coefficients

g1 = modelplot(mod_sum, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")


g2 = modelplot(mod_mean, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Aggregate Driver Scores

df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])


formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)

models_parcel = list("model_sum_score" = mod_sum_parcel,
     "model_mean_score"= mod_mean_parcel
     )

modelsummary(models_parcel, stars=TRUE)
tinytable_or59mjdhizio3l7k66t7
model_sum_score model_mean_score
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 4.378*** 1.459***
(1.036) (0.345)
se_acad_mean 0.252 0.084
(0.176) (0.059)
se_social_mean 1.205*** 0.402***
(0.195) (0.065)
sup_friends_mean 0.049 0.016
(0.108) (0.036)
sup_parents_mean 0.685*** 0.228***
(0.111) (0.037)
Num.Obs. 283 283
R2 0.397 0.397
R2 Adj. 0.388 0.388
AIC 1105.3 483.5
BIC 1127.1 505.3
Log.Lik. -546.638 -235.731
RMSE 1.67 0.56
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"), guide = "none")


g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Hierarchical Models

formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3  + (1 | region)"

formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"

hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)

g1 = modelplot(hierarchical_mean_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")

g2 = modelplot(hierarchical_sum_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))


plot <- ggarrange(g1,g2, ncol=2, nrow=1);
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
                  "hierarchical_sum_fit"= hierarchical_sum_fit), 
             stars = TRUE) |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftl5t6kstw3adyosicbz
hierarchical_mean_fit hierarchical_sum_fit
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 0.903* 2.710*
(0.385) (1.154)
sup_parents_p1 -0.007 -0.021
(0.053) (0.160)
sup_parents_p2 0.070 0.210
(0.053) (0.159)
sup_parents_p3 0.160*** 0.480***
(0.046) (0.139)
sup_friends_p1 -0.091+ -0.274+
(0.055) (0.166)
sup_friends_p2 0.125* 0.376*
(0.059) (0.177)
sup_friends_p3 0.024 0.072
(0.048) (0.144)
se_acad_p1 -0.041 -0.122
(0.071) (0.213)
se_acad_p2 0.131+ 0.393+
(0.074) (0.223)
se_acad_p3 0.039 0.116
(0.067) (0.202)
se_social_p1 0.094 0.283
(0.075) (0.224)
se_social_p2 0.191* 0.574*
(0.081) (0.244)
se_social_p3 0.130* 0.389*
(0.059) (0.178)
SD (Intercept region) 0.160 0.481
SD (Observations) 0.549 1.648
Num.Obs. 283 283
R2 Marg. 0.424 0.424
R2 Cond. 0.469 0.469
AIC 538.4 1131.6
BIC 593.1 1186.3
ICC 0.1 0.1
RMSE 0.54 1.61

Marginal Effects

pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))

pred1 |> head() |> kableExtra::kable()
rowid estimate std.error statistic p.value s.value conf.low conf.high se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p3 sup_parents_p2 ls_mean
1 5.232276 0.2673967 19.56747 0 280.8136 4.708188 5.756363 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 1 6.576815
2 5.287553 0.2140733 24.69973 0 445.0319 4.867977 5.707128 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 2 6.576815
3 5.342829 0.1610972 33.16525 0 798.8130 5.027085 5.658574 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 3 6.576815
4 5.398106 0.1089765 49.53461 0 Inf 5.184516 5.611696 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 4 6.576815
5 5.453383 0.0599835 90.91472 0 Inf 5.335818 5.570949 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 5 6.576815
6 5.508660 0.0334480 164.69327 0 Inf 5.443103 5.574217 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 6 6.576815

Regression Marginal Effects

g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"), 
                      type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Hierarchical Marginal Effects

g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"), 
                      type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Confirmatory Factor Analysis

model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3
"

model_measurement1 <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ a1*sup_friends_p1 + a2*sup_friends_p2 + a3*sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

a1 == a2 
a1 == a3
"


fit_mod <- cfa(model_measurement, data = df)
fit_mod_1<- cfa(model_measurement1, data = df)


cfa_models = list("full_measurement_model" = fit_mod, 
     "measurement_model_reduced" = fit_mod_1)
modelplot(cfa_models)

summary(fit_mod, fit.measures = TRUE, standardized = TRUE) 
lavaan 0.6-18 ended normally after 46 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        40

  Number of observations                           283

Model Test User Model:
                                                      
  Test statistic                               207.137
  Degrees of freedom                                80
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              2586.651
  Degrees of freedom                               105
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.949
  Tucker-Lewis Index (TLI)                       0.933

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -4394.212
  Loglikelihood unrestricted model (H1)      -4290.643
                                                      
  Akaike (AIC)                                8868.423
  Bayesian (BIC)                              9014.241
  Sample-size adjusted Bayesian (SABIC)       8887.401

Root Mean Square Error of Approximation:

  RMSEA                                          0.075
  90 Percent confidence interval - lower         0.062
  90 Percent confidence interval - upper         0.088
  P-value H_0: RMSEA <= 0.050                    0.001
  P-value H_0: RMSEA >= 0.080                    0.263

Standardized Root Mean Square Residual:

  SRMR                                           0.056

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents =~                                                        
    sup_parents_p1    1.000                               0.938    0.876
    sup_parents_p2    1.034    0.056   18.570    0.000    0.970    0.888
    sup_parents_p3    0.988    0.059   16.637    0.000    0.927    0.811
  SUP_Friends =~                                                        
    sup_friends_p1    1.000                               1.021    0.906
    sup_friends_p2    0.793    0.043   18.466    0.000    0.809    0.857
    sup_friends_p3    0.891    0.050   17.735    0.000    0.910    0.831
  SE_Academic =~                                                        
    se_acad_p1        1.000                               0.697    0.880
    se_acad_p2        0.806    0.049   16.278    0.000    0.561    0.819
    se_acad_p3        0.952    0.058   16.491    0.000    0.663    0.828
  SE_Social =~                                                          
    se_social_p1      1.000                               0.640    0.846
    se_social_p2      0.959    0.055   17.338    0.000    0.614    0.881
    se_social_p3      0.926    0.066   13.956    0.000    0.593    0.742
  LS =~                                                                 
    ls_p1             1.000                               0.677    0.729
    ls_p2             0.820    0.078   10.488    0.000    0.555    0.762
    ls_p3             0.744    0.109    6.809    0.000    0.504    0.459

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents ~~                                                        
    SUP_Friends       0.134    0.064    2.094    0.036    0.140    0.140
    SE_Academic       0.219    0.046    4.717    0.000    0.335    0.335
    SE_Social         0.284    0.046    6.239    0.000    0.473    0.473
    LS                0.360    0.056    6.455    0.000    0.567    0.567
  SUP_Friends ~~                                                        
    SE_Academic       0.071    0.047    1.493    0.135    0.100    0.100
    SE_Social         0.195    0.046    4.262    0.000    0.299    0.299
    LS                0.184    0.053    3.500    0.000    0.267    0.267
  SE_Academic ~~                                                        
    SE_Social         0.274    0.036    7.525    0.000    0.613    0.613
    LS                0.212    0.039    5.407    0.000    0.449    0.449
  SE_Social ~~                                                          
    LS                0.330    0.043    7.650    0.000    0.761    0.761

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .sup_parents_p1    0.268    0.037    7.143    0.000    0.268    0.233
   .sup_parents_p2    0.253    0.038    6.595    0.000    0.253    0.212
   .sup_parents_p3    0.446    0.048    9.262    0.000    0.446    0.342
   .sup_friends_p1    0.228    0.040    5.663    0.000    0.228    0.179
   .sup_friends_p2    0.237    0.030    7.926    0.000    0.237    0.266
   .sup_friends_p3    0.371    0.042    8.811    0.000    0.371    0.310
   .se_acad_p1        0.141    0.022    6.487    0.000    0.141    0.226
   .se_acad_p2        0.154    0.018    8.648    0.000    0.154    0.329
   .se_acad_p3        0.202    0.024    8.397    0.000    0.202    0.315
   .se_social_p1      0.163    0.020    8.095    0.000    0.163    0.284
   .se_social_p2      0.109    0.016    6.782    0.000    0.109    0.224
   .se_social_p3      0.287    0.028   10.141    0.000    0.287    0.450
   .ls_p1             0.404    0.048    8.422    0.000    0.404    0.469
   .ls_p2             0.223    0.029    7.624    0.000    0.223    0.420
   .ls_p3             0.951    0.085   11.123    0.000    0.951    0.789
    SUP_Parents       0.880    0.098    8.937    0.000    1.000    1.000
    SUP_Friends       1.042    0.111    9.405    0.000    1.000    1.000
    SE_Academic       0.485    0.054    8.908    0.000    1.000    1.000
    SE_Social         0.410    0.048    8.459    0.000    1.000    1.000
    LS                0.458    0.072    6.323    0.000    1.000    1.000
g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(data.frame(fitted(fit_mod)$cov), title="Model Implied Covariances", "Fitted Values")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Modification Indices

modindices(fit_mod, sort = TRUE, maximum.number = 6, standardized = FALSE, minimum.value = 5)
               lhs op            rhs     mi    epc
152 sup_friends_p1 ~~   se_social_p3 19.245  0.095
68     SUP_Friends =~          ls_p2 17.491  0.181
64     SUP_Friends =~   se_social_p1 17.210 -0.136
60     SUP_Friends =~ sup_parents_p3 16.063 -0.187
210          ls_p2 ~~          ls_p3 13.931 -0.140
57     SUP_Parents =~          ls_p3 13.057  0.329

Summary Global Fit Measures

summary_df = cbind(fitMeasures(fit_mod, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")),
      fitMeasures(fit_mod_1, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")))
colnames(summary_df) = c('Full Model', 'Reduced Model')

summary_df
                  Full Model Reduced Model
chisq           207.13746909  225.18095274
baseline.chisq 2586.65112811 2586.65112811
cfi               0.94876900    0.94230416
aic            8868.42313715 8882.46662079
bic            9014.24101305 9020.99360290
rmsea             0.07493739    0.07854933
srmr              0.05595908    0.05904842
semPlot::semPaths(fit_mod, whatLabels = 'est', intercepts = FALSE)

semPlot::semPaths(fit_mod_1, whatLabels = 'std', intercepts = FALSE)

Comparing the empirical and implied variance-covariance matrix

heat_df = data.frame(resid(fit_mod, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g1 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Full Model")

heat_df = data.frame(resid(fit_mod_1, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g2 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Reduced Model")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

anova(fit_mod)
Chi-Squared Test Statistic (unscaled)

          Df    AIC    BIC  Chisq Chisq diff Df diff         Pr(>Chisq)    
Saturated  0                 0.00                                          
Model     80 8868.4 9014.2 207.14     207.14      80 0.0000000000003209 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod_1)
Chi-Squared Test Statistic (unscaled)

          Df    AIC  BIC  Chisq Chisq diff Df diff           Pr(>Chisq)    
Saturated  0               0.00                                            
Model     82 8882.5 9021 225.18     225.18      82 0.000000000000002776 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod, fit_mod_1)

Chi-Squared Difference Test

          Df    AIC    BIC  Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit_mod   80 8868.4 9014.2 207.14                                          
fit_mod_1 82 8882.5 9021.0 225.18     18.044 0.16836       2  0.0001208 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Structural Equation Models

model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

# Structural model 
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends

# Residual covariances
SE_Academic ~~ SE_Social
"

fit_mod_sem <- sem(model, data = df)
modelplot(fit_mod_sem)

semPlot::semPaths(fit_mod_sem, whatLabels = 'std', intercepts = FALSE)

heat_df = data.frame(resid(fit_mod_sem, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")

Confirmatory Factor Models with PyMC

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pytensor import tensor as pt
import arviz as az
import networkx as nx



df_p = pd.read_csv('IIS.dat', sep='\s+')
df_p.head()
     PI    AD   IGC   FI   FC
0  4.00  3.38  4.67  2.6  3.2
1  2.57  3.00  3.50  2.4  2.8
2  2.29  3.29  4.83  2.0  3.4
3  2.43  3.63  4.33  3.6  3.8
4  3.00  4.00  4.83  3.4  3.8
coords = {'obs': list(range(len(df_p))), 
          'indicators': ['PI', 'AD',    'IGC', 'FI', 'FC'],
          'indicators_1': ['PI', 'AD',  'IGC'],
          'indicators_2': ['FI', 'FC'],
          'latent': ['Student', 'Faculty']
          }


obs_idx = list(range(len(df_p)))
with pm.Model(coords=coords) as model:
  
  Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
  lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
  lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
  lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
  lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
  tau = pm.Normal('tau', 3, 10, dims='indicators')
  kappa = 0
  sd_dist = pm.Exponential.dist(1.0, shape=2)
  chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=2, eta=2,
    sd_dist=sd_dist, compute_corr=True)
  ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))

  m1 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
  m2 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
  m3 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
  m4 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
  m5 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
  
  mu = pm.Deterministic('mu', pm.math.stack([m1, m2, m3, m4, m5]).T)
  _  = pm.Normal('likelihood', mu, Psi, observed=df_p.values)

  idata = pm.sample(nuts_sampler='numpyro', target_accept=.95)
  
summary_df = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi', 'chol_cov_corr'], coords= {'obs': [0, 7]})

PyMC Confirmatory Factor Model

py$summary_df |> kable()
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
lambdas1[PI] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas1[AD] 0.905 0.062 0.790 1.018 0.004 0.003 313 653 1.01
lambdas1[IGC] 0.539 0.044 0.456 0.623 0.002 0.002 424 672 1.01
lambdas2[FI] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas2[FC] 0.979 0.056 0.879 1.085 0.003 0.002 485 1216 1.01
tau[PI] 3.336 0.036 3.271 3.406 0.001 0.001 632 1335 1.00
tau[AD] 3.901 0.026 3.854 3.952 0.001 0.001 414 1065 1.00
tau[IGC] 4.598 0.021 4.560 4.636 0.001 0.001 662 1590 1.00
tau[FI] 3.038 0.040 2.965 3.112 0.002 0.001 534 1226 1.01
tau[FC] 3.716 0.035 3.651 3.783 0.002 0.001 411 923 1.01
Psi[PI] 0.610 0.023 0.567 0.655 0.001 0.000 1147 2080 1.00
Psi[AD] 0.316 0.020 0.280 0.355 0.001 0.001 672 1100 1.00
Psi[IGC] 0.355 0.013 0.331 0.381 0.000 0.000 2436 2721 1.00
Psi[FI] 0.570 0.025 0.522 0.615 0.001 0.001 1117 1985 1.01
Psi[FC] 0.420 0.026 0.368 0.466 0.001 0.001 821 1239 1.00
ksi[0, Student] -0.229 0.228 -0.651 0.210 0.003 0.003 4488 3283 1.00
ksi[0, Faculty] -0.373 0.284 -0.926 0.141 0.004 0.004 4450 2851 1.00
ksi[7, Student] 0.878 0.233 0.451 1.329 0.004 0.003 3116 2890 1.00
ksi[7, Faculty] 0.862 0.274 0.351 1.386 0.004 0.003 3789 2967 1.00
chol_cov_corr[0, 0] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
chol_cov_corr[0, 1] 0.849 0.028 0.793 0.901 0.001 0.001 550 635 1.00
chol_cov_corr[1, 0] 0.849 0.028 0.793 0.901 0.001 0.001 550 635 1.00
chol_cov_corr[1, 1] 1.000 0.000 1.000 1.000 0.000 0.000 4121 4000 1.00
az.plot_trace(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi']);
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
  warnings.warn("Your data appears to have a single value or no finite values")
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
  warnings.warn("Your data appears to have a single value or no finite values")

df = pd.read_csv('sem_data.csv')
drivers = ['se_acad_p1', 'se_acad_p2',
       'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3',
       'sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1',
       'sup_parents_p2', 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3']
       
       
coords = {'obs': list(range(len(df))), 
          'indicators': drivers,
          'indicators_1': ['se_acad_p1','se_acad_p2','se_acad_p3'],
          'indicators_2': ['se_social_p1','se_social_p2','se_social_p3'],
          'indicators_3': ['sup_friends_p1','sup_friends_p2','sup_friends_p3'],
          'indicators_4': [ 'sup_parents_p1','sup_parents_p2','sup_parents_p3'],
          'indicators_5': ['ls_p1','ls_p2', 'ls_p3'],
          'latent': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS'],
          'latent1': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
          }

obs_idx = list(range(len(df)))
with pm.Model(coords=coords) as model:
  
  Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
  lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
  lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
  lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
  lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
  lambdas_ = pm.Normal('lambdas_3', 1, 10, dims=('indicators_3'))
  lambdas_3 = pm.Deterministic('lambdas3', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_3'))
  lambdas_ = pm.Normal('lambdas_4', 1, 10, dims=('indicators_4'))
  lambdas_4 = pm.Deterministic('lambdas4', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_4'))
  lambdas_ = pm.Normal('lambdas_5', 1, 10, dims=('indicators_5'))
  lambdas_5 = pm.Deterministic('lambdas5', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_5'))
  tau = pm.Normal('tau', 3, 10, dims='indicators')
  kappa = 0
  sd_dist = pm.Exponential.dist(1.0, shape=5)
  chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=5, eta=2,
    sd_dist=sd_dist, compute_corr=True)
  cov = pm.Deterministic("cov", chol.dot(chol.T), dims=('latent', 'latent1'))
  ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))

  m0 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
  m1 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
  m2 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
  m3 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
  m4 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
  m5 = tau[5] + ksi[obs_idx, 1]*lambdas_2[2]
  m6 = tau[6] + ksi[obs_idx, 2]*lambdas_3[0]
  m7 = tau[7] + ksi[obs_idx, 2]*lambdas_3[1]
  m8 = tau[8] + ksi[obs_idx, 2]*lambdas_3[2]
  m9 = tau[9] + ksi[obs_idx, 3]*lambdas_4[0]
  m10 = tau[10] + ksi[obs_idx, 3]*lambdas_4[1]
  m11 = tau[11] + ksi[obs_idx, 3]*lambdas_4[2]
  m12 = tau[12] + ksi[obs_idx, 4]*lambdas_5[0]
  m13 = tau[13] + ksi[obs_idx, 4]*lambdas_5[1]
  m14 = tau[14] + ksi[obs_idx, 4]*lambdas_5[2]
  
  mu = pm.Deterministic('mu', pm.math.stack([m0, m1, m2, m3, m4, m5, m6, m7,
                                             m8, m9, m10, m11, m12, m13, m14]).T)
  _  = pm.Normal('likelihood', mu, Psi, observed=df[drivers].values)

  idata = pm.sample(nuts_sampler='numpyro', target_accept=.95)
  
summary_df1 = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5', 'tau', 'Psi'])

cov_df = pd.DataFrame(az.extract(idata['posterior'])['cov'].mean(axis=2))
cov_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
cov_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']

correlation_df = pd.DataFrame(az.extract(idata['posterior'])['chol_cov_corr'].mean(axis=2))
correlation_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
correlation_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']

Life Satisfaction Model

py$summary_df1 |> kable()
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
lambdas1[se_acad_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas1[se_acad_p2] 0.819 0.054 0.714 0.918 0.002 0.001 1021 1949 1.00
lambdas1[se_acad_p3] 0.969 0.063 0.854 1.085 0.002 0.001 1053 1894 1.01
lambdas2[se_social_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas2[se_social_p2] 0.962 0.060 0.849 1.074 0.002 0.002 646 1320 1.01
lambdas2[se_social_p3] 0.940 0.073 0.794 1.072 0.002 0.002 871 1905 1.00
lambdas3[sup_friends_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas3[sup_friends_p2] 0.801 0.045 0.718 0.886 0.002 0.001 872 2224 1.00
lambdas3[sup_friends_p3] 0.905 0.053 0.801 1.000 0.001 0.001 1268 2264 1.00
lambdas4[sup_parents_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas4[sup_parents_p2] 1.039 0.055 0.923 1.133 0.002 0.001 863 1558 1.00
lambdas4[sup_parents_p3] 1.008 0.063 0.894 1.130 0.002 0.001 1147 2030 1.00
lambdas5[ls_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas5[ls_p2] 0.802 0.081 0.645 0.948 0.004 0.003 526 979 1.00
lambdas5[ls_p3] 1.002 0.105 0.811 1.196 0.005 0.004 402 909 1.00
tau[se_acad_p1] 5.154 0.047 5.067 5.245 0.003 0.002 338 975 1.01
tau[se_acad_p2] 5.346 0.041 5.269 5.420 0.002 0.002 346 1056 1.01
tau[se_acad_p3] 5.211 0.048 5.125 5.302 0.002 0.002 375 1136 1.01
tau[se_social_p1] 5.289 0.046 5.201 5.372 0.003 0.002 244 499 1.02
tau[se_social_p2] 5.476 0.043 5.392 5.556 0.003 0.002 228 466 1.02
tau[se_social_p3] 5.440 0.049 5.345 5.529 0.003 0.002 300 841 1.02
tau[sup_friends_p1] 5.786 0.068 5.655 5.907 0.004 0.003 360 670 1.01
tau[sup_friends_p2] 6.010 0.058 5.902 6.121 0.003 0.002 366 852 1.01
tau[sup_friends_p3] 5.991 0.066 5.874 6.121 0.003 0.002 396 899 1.01
tau[sup_parents_p1] 5.975 0.066 5.851 6.093 0.004 0.003 296 561 1.01
tau[sup_parents_p2] 5.927 0.067 5.791 6.043 0.004 0.003 288 590 1.01
tau[sup_parents_p3] 5.719 0.070 5.589 5.848 0.004 0.003 325 740 1.01
tau[ls_p1] 5.191 0.056 5.082 5.289 0.003 0.002 326 1047 1.01
tau[ls_p2] 5.777 0.045 5.691 5.860 0.002 0.002 341 910 1.01
tau[ls_p3] 5.221 0.053 5.118 5.319 0.003 0.002 324 899 1.01
Psi[se_acad_p1] 0.413 0.029 0.358 0.467 0.001 0.001 1121 1574 1.00
Psi[se_acad_p2] 0.413 0.023 0.369 0.457 0.001 0.000 2153 2702 1.00
Psi[se_acad_p3] 0.468 0.028 0.415 0.519 0.001 0.000 1941 2325 1.01
Psi[se_social_p1] 0.431 0.026 0.383 0.480 0.001 0.000 1590 1992 1.00
Psi[se_social_p2] 0.362 0.024 0.318 0.408 0.001 0.000 1364 1858 1.00
Psi[se_social_p3] 0.553 0.028 0.502 0.608 0.001 0.000 2340 2640 1.00
Psi[sup_friends_p1] 0.515 0.039 0.443 0.592 0.001 0.001 966 1617 1.01
Psi[sup_friends_p2] 0.509 0.031 0.448 0.566 0.001 0.001 1255 2321 1.00
Psi[sup_friends_p3] 0.624 0.036 0.551 0.688 0.001 0.001 2276 2577 1.00
Psi[sup_parents_p1] 0.550 0.035 0.480 0.612 0.001 0.001 1460 1849 1.00
Psi[sup_parents_p2] 0.535 0.037 0.468 0.608 0.001 0.001 1248 1822 1.00
Psi[sup_parents_p3] 0.675 0.038 0.608 0.752 0.001 0.001 1772 1861 1.00
Psi[ls_p1] 0.674 0.038 0.608 0.749 0.001 0.001 922 1837 1.00
Psi[ls_p2] 0.533 0.030 0.477 0.590 0.001 0.001 1507 2295 1.00
Psi[ls_p3] 0.623 0.036 0.552 0.687 0.001 0.001 1541 2435 1.00

fig, ax = plt.subplots(figsize=(10, 8))
az.plot_forest(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5'], combined=True, ax=ax);

py$cov_df |> kable()
SE_ACAD SE_SOCIAL SUP_F SUP_P LS
SE_ACAD 0.4713384 0.2605968 0.0630514 0.2037995 0.2194228
SE_SOCIAL 0.2605968 0.3960253 0.1855672 0.2656970 0.3013191
SUP_F 0.0630514 0.1855672 1.0359366 0.1177974 0.1619751
SUP_P 0.2037995 0.2656970 0.1177974 0.8631736 0.3800464
LS 0.2194228 0.3013191 0.1619751 0.3800464 0.4122127
py$correlation_df |> kable()
SE_ACAD SE_SOCIAL SUP_F SUP_P LS
SE_ACAD 1.0000000 0.6036868 0.0902922 0.3196183 0.4991668
SE_SOCIAL 0.6036868 1.0000000 0.2901458 0.4546039 0.7484541
SUP_F 0.0902922 0.2901458 1.0000000 0.1246230 0.2489341
SUP_P 0.3196183 0.4546039 0.1246230 1.0000000 0.6396210
LS 0.4991668 0.7484541 0.2489341 0.6396210 1.0000000

Citation

BibTeX citation:
@online{forde,
  author = {Nathaniel Forde},
  title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
    {Models}},
  langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural Equation Models.”